﻿#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import re
import urllib2
from BeautifulSoup import BeautifulSoup

from additional import only_numbers

def easy_download(url):
	try:
		urlobject = urllib2.urlopen(url, timeout=5)
		urlcontent = urlobject.read()
		return urlcontent
	except:
		print "\tCould not open '%s'" % url
		return


def is_html(urlobject):
	ret = False
	for element in urlobject.info().headers:
		if element.find('html') != -1:
			ret = True
			break
	return ret

def soup_from_url(url):
	try:
		urlobject = urllib2.urlopen(url, timeout=5)
		if not is_html(urlobject):
			print "\tPage '%s' is not a text or html document." % url
			return
		urlcontent = urlobject.read()
	except:
		print "\tCould not open '%s'" % url
		return

	try:
		soup = BeautifulSoup(urlcontent)
	except:
		print "\tCould not parse page '%s'" % url
		return
	return soup

# Deprecated
def newest_news_nr():
    regex = re.compile(r"(\d)+")
    return int(regex.search(soup_from_url('http://wmi.amu.edu.pl/').find('ol').\
        find('a')['href'].split('/')[-1]).group(0)) + 5
